{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "e3a62606",
   "metadata": {},
   "source": [
    "# Linear Algebra in Python: Matrix Inverses and Least-Squares"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "a271640b",
   "metadata": {},
   "source": [
    "## Vectors, Matrices, and the Role of Linear Algebra"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e79727ae",
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "\n",
    "np.array([[1, 2], [3, 4], [5, 6]])\n",
    "# Expected:\n",
    "# array([[1, 2],\n",
    "#        [3, 4],\n",
    "#        [5, 6]])"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "7664dd00",
   "metadata": {},
   "source": [
    "## Calculating Inverses and Determinants With `scipy.linalg`"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "976f8420",
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "from scipy import linalg\n",
    "\n",
    "A = np.array(\n",
    "    [\n",
    "        [1, 9, 2, 1, 1],\n",
    "        [10, 1, 2, 1, 1],\n",
    "        [1, 0, 5, 1, 1],\n",
    "        [2, 1, 1, 2, 9],\n",
    "        [2, 1, 2, 13, 2],\n",
    "    ]\n",
    ")\n",
    "b = np.array([170, 180, 140, 180, 350]).reshape((5, 1))\n",
    "A_inv = linalg.inv(A)\n",
    "x = A_inv @ b\n",
    "x\n",
    "# Expected:\n",
    "# array([[10.],\n",
    "#        [10.],\n",
    "#        [20.],\n",
    "#        [20.],\n",
    "#        [10.]])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b4edfe0e-2fea-453d-808b-33e238cd1c67",
   "metadata": {},
   "outputs": [],
   "source": [
    "linalg.det(A)\n",
    "# Expected:\n",
    "# 45102.0"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "cc9c209d-08f7-4616-b06d-4b28aca0bd86",
   "metadata": {},
   "outputs": [],
   "source": [
    "A_inv\n",
    "# Expected:\n",
    "# array([[-0.01077558,  0.10655847, -0.03565252, -0.0058534 , -0.00372489],\n",
    "#        [ 0.11287748, -0.00512172, -0.04010909, -0.00658507, -0.0041905 ],\n",
    "#        [ 0.0052991 , -0.01536517,  0.21300608, -0.01975522, -0.0125715 ],\n",
    "#        [-0.0064077 , -0.01070906, -0.02325839, -0.01376879,  0.08214713],\n",
    "#        [-0.00931223, -0.01902355, -0.00611946,  0.1183983 , -0.01556472]])"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "5178883b",
   "metadata": {},
   "source": [
    "## Interpolating Polynomials With Linear Systems"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a8fb7d47",
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "from scipy import linalg\n",
    "\n",
    "A = np.array([[1, 1, 1], [1, 2, 4], [1, 3, 9]])\n",
    "linalg.det(A)\n",
    "# Expected:\n",
    "# 1.9999999999999996"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3f85cde9",
   "metadata": {},
   "outputs": [],
   "source": [
    "b = np.array([5, 13, 25]).reshape((3, 1))\n",
    "a = linalg.inv(A) @ b\n",
    "a\n",
    "# Expected:\n",
    "# array([[1.],\n",
    "#        [2.],\n",
    "#        [2.]])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "411c8b42",
   "metadata": {},
   "outputs": [],
   "source": [
    "A = np.array([[1, 1, 1], [1, 2, 4], [1, 2, 4]])\n",
    "linalg.det(A)\n",
    "# Expected:\n",
    "# 0.0"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9f0a9ba8",
   "metadata": {},
   "outputs": [],
   "source": [
    "b = np.array([5, 13, 25]).reshape((3, 1))\n",
    "x = linalg.inv(A) @ b\n",
    "# Expected:\n",
    "# ---------------------------------------------------------------------------\n",
    "# LinAlgError                               Traceback (most recent call last)\n",
    "# <ipython-input-10-e6ee9b06a6fe> in <module>\n",
    "# ----> 1 x = linalg.inv(A) @ b\n",
    "# LinAlgError: singular matrix"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "62117d42",
   "metadata": {},
   "source": [
    "## Predicting Prices With Least Squares"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "568272ff",
   "metadata": {},
   "source": [
    "### Building Least Squares Models Using `scipy.linalg`"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c551988e",
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "from scipy import linalg\n",
    "\n",
    "A = np.array([[1, 1, 1], [1, 2, 4], [1, 2, 4]])\n",
    "b = np.array([5, 13, 25]).reshape((3, 1))\n",
    "p, *_ = linalg.lstsq(A, b)\n",
    "p\n",
    "# Expected:\n",
    "# array([[-0.42857143],\n",
    "#        [ 1.14285714],\n",
    "#        [ 4.28571429]])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9076e0f2",
   "metadata": {},
   "outputs": [],
   "source": [
    "import matplotlib.pyplot as plt\n",
    "\n",
    "x = np.linspace(0, 3, 1000)\n",
    "y = p[0] + p[1] * x + p[2] * x**2\n",
    "plt.plot(x, y)\n",
    "plt.plot(1, 5, \"ro\")\n",
    "plt.plot(2, 13, \"ro\")\n",
    "plt.plot(2, 25, \"ro\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "0dbb9566",
   "metadata": {},
   "source": [
    "### Obtaining Least Squares Solutions Using Pseudoinverse"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "da90dab6",
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "from scipy import linalg\n",
    "\n",
    "A = np.array([[1, 1, 1], [1, 2, 4], [1, 2, 4]])\n",
    "b = np.array([5, 13, 25]).reshape((3, 1))\n",
    "A_pinv = linalg.pinv(A)\n",
    "p2 = A_pinv @ b\n",
    "p2\n",
    "# Expected:\n",
    "# array([[-0.42857143],\n",
    "#        [ 1.14285714],\n",
    "#        [ 4.28571429]])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0523bc44",
   "metadata": {},
   "outputs": [],
   "source": [
    "A_pinv\n",
    "# Expected:\n",
    "# array([[ 1.        , -0.14285714, -0.14285714],\n",
    "#        [ 0.5       , -0.03571429, -0.03571429],\n",
    "#        [-0.5       ,  0.17857143,  0.17857143]])"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "387df12e",
   "metadata": {},
   "source": [
    "## Example: Predicting Car Prices With Least Squares"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "0e2164e6",
   "metadata": {},
   "source": [
    "### Preparing the Data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2e2f65ed",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "\n",
    "cars_data = pd.read_csv(\"vehicles_cleaned.csv\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "1ca58328",
   "metadata": {},
   "outputs": [],
   "source": [
    "cars_data.columns\n",
    "# Expected:\n",
    "# Index(['price', 'year', 'condition', 'cylinders', 'fuel', 'odometer',\n",
    "#        'transmission', 'size', 'type'],\n",
    "#       dtype='object')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "da4af434",
   "metadata": {},
   "outputs": [],
   "source": [
    "cars_data.iloc[0]\n",
    "# Expected:\n",
    "# price                  7000\n",
    "# year                   2011\n",
    "# condition              good\n",
    "# cylinders       4 cylinders\n",
    "# fuel                    gas\n",
    "# odometer              76202\n",
    "# transmission      automatic\n",
    "# size                compact\n",
    "# type                  sedan"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9a909bc8",
   "metadata": {},
   "outputs": [],
   "source": [
    "cars_data_dummies = pd.get_dummies(\n",
    "    cars_data,\n",
    "    columns=[\n",
    "        \"condition\",\n",
    "        \"cylinders\",\n",
    "        \"fuel\",\n",
    "        \"transmission\",\n",
    "        \"size\",\n",
    "        \"type\",\n",
    "    ],\n",
    "    drop_first=True,\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9680749b",
   "metadata": {},
   "outputs": [],
   "source": [
    "cars_data_dummies.columns\n",
    "# Expected:\n",
    "# Index(['price', 'year', 'odometer', 'condition_fair', 'condition_good',\n",
    "#        'condition_like new', 'condition_new', 'condition_salvage',\n",
    "#        'cylinders_6 cylinders', 'fuel_gas', 'transmission_manual',\n",
    "#        'size_full-size', 'size_mid-size', 'size_sub-compact', 'type_hatchback',\n",
    "#        'type_sedan', 'type_wagon'],\n",
    "#       dtype='object')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ca67b328",
   "metadata": {},
   "outputs": [],
   "source": [
    "cars_data_dummies[\"intercept\"] = 1"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "07e580f1",
   "metadata": {},
   "source": [
    "### Building the Model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b69406fc",
   "metadata": {},
   "outputs": [],
   "source": [
    "A = cars_data_dummies.drop(columns=[\"price\"]).to_numpy()\n",
    "b = cars_data_dummies.loc[:, \"price\"].to_numpy()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "bf925c82",
   "metadata": {},
   "outputs": [],
   "source": [
    "from scipy import linalg\n",
    "\n",
    "p, *_ = linalg.lstsq(A, b)\n",
    "p\n",
    "# Expected:\n",
    "# array([ 8.47362988e+02, -3.53913729e-02, -3.47144752e+03, -1.66981155e+03,\n",
    "#        -1.80240398e+02, -7.15885691e+03, -6.36540791e+03,  3.76583261e+03,\n",
    "#        -1.84837210e+03,  1.31935783e+03,  6.60484388e+02,  6.38913933e+02,\n",
    "#         1.54163679e+02, -1.76423109e+03, -1.99439766e+03,  6.97365788e+02,\n",
    "#        -1.68998811e+06])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2c973c07",
   "metadata": {},
   "outputs": [],
   "source": [
    "p2 = linalg.pinv(A) @ b\n",
    "p2\n",
    "# Expected:\n",
    "# array([ 8.47362988e+02, -3.53913729e-02, -3.47144752e+03, -1.66981155e+03,\n",
    "#        -1.80240398e+02, -7.15885691e+03, -6.36540791e+03,  3.76583261e+03,\n",
    "#        -1.84837210e+03,  1.31935783e+03,  6.60484388e+02,  6.38913933e+02,\n",
    "#         1.54163679e+02, -1.76423109e+03, -1.99439766e+03,  6.97365788e+02,\n",
    "#        -1.68998811e+06])"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "22e1a286",
   "metadata": {},
   "source": [
    "### Predicting Prices"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c4c2b344",
   "metadata": {},
   "outputs": [],
   "source": [
    "cars_data_dummies.drop(columns=[\"price\"]).columns\n",
    "# Expected:\n",
    "# Index(['price', 'year', 'odometer', 'condition_fair', 'condition_good',\n",
    "#        'condition_like new', 'condition_new', 'condition_salvage',\n",
    "#        'cylinders_6 cylinders', 'fuel_gas', 'transmission_manual',\n",
    "#        'size_full-size', 'size_mid-size', 'size_sub-compact', 'type_hatchback',\n",
    "#        'type_sedan', 'type_wagon', 'intercept'],\n",
    "#       dtype='object')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "cf300c09",
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "\n",
    "car = np.array([2010, 50000, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "85d68787",
   "metadata": {},
   "outputs": [],
   "source": [
    "predicted_price = p @ car\n",
    "predicted_price\n",
    "# Expected:\n",
    "# 6159.510724281656"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language": "python",
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
